clear 

*Combines twcget_new and collapsetwc into one program

*Gets the datasets for 2003 and 2004
* note: 2004 quarter 4 data being corrupted

*cap do "${fmartorell_home}/top_program.do"

set more off

# delimit;

*log using ${fmartorell_home}remediation/programs/twc_for_remediation, replace;
log using ${d1}log/twc_for_remediation, replace;

*use altpid using ${fmartorell_home}remediation/data/tasp192_200_withall, clear;
use altpid using ${d1}data/tasp192_200_withall, clear;
bysort altpid: assert _N==1;
*save ${fmartorell_home}tmp/tasp, replace;
save ${d1}tmp/tasp, replace;
*d using ${fmartorell_home}tmp/tasp;  /* list of ssn's from TAAS: see twcget.do */;
d using ${d1}tmp/tasp;  /* list of ssn's from TAAS: see twcget.do */;
global taasn=r(N);

/* --------------------------------------------------------------------------------------
Now create the TWC datasets:
1 for each quarter keeping only those records matching to the master TAAS list
----------------------------------------------------------------------------------------- */;

program define cpi;
*cpi, from ftp://ftp.bls.gov/pub/special.requests/cpi/cpiai.txt, months 2, 5, 7, 11;
if `1'==	1989	{;  local q1=	121.6	; local q2=	123.8	; local q3=	124.4	; local q4=	125.9	; };
if `1'==	1990	{;  local q1=	128	; local q2=	129.2	; local q3=	130.4	; local q4=	133.8	; };
if `1'==	1991	{;  local q1=	134.8	; local q2=	135.6	; local q3=	136.2	; local q4=	137.8	; };
if `1'==	1992	{;  local q1=	138.6	; local q2=	139.7	; local q3=	140.5	; local q4=	142	; };
if `1'==	1993	{;  local q1=	143.1	; local q2=	144.2	; local q3=	144.4	; local q4=	145.8	; };
if `1'==	1994	{;  local q1=	146.7	; local q2=	147.5	; local q3=	148.4	; local q4=	149.7	; };
if `1'==	1995	{;  local q1=	150.9	; local q2=	152.2	; local q3=	152.5	; local q4=	153.6	; };
if `1'==	1996	{;  local q1=	154.9	; local q2=	156.6	; local q3=	157	; local q4=	158.6	; };
if `1'==	1997	{;  local q1=	159.6	; local q2=	160.1	; local q3=	160.5	; local q4=	161.5	; };
if `1'==	1998	{;  local q1=	161.9	; local q2=	162.8	; local q3=	163.2	; local q4=	164	; };
if `1'==	1999	{;  local q1=	164.5	; local q2=	166.2	; local q3=	166.7	; local q4=	168.3	; };
if `1'==	2000	{;  local q1=	169.8	; local q2=	171.5	; local q3=	172.8	; local q4=	174.1	; };
if `1'==	2001	{;  local q1=	175.8	; local q2=	177.7	; local q3=	177.5	; local q4=	177.4	; };
if `1'==	2002	{;  local q1=	177.8	; local q2=	179.8	; local q3=	180.1	; local q4=	181.3	; };
if `1'==	2003	{;  local q1=	183.1	; local q2=	183.5	; local q3=	183.9	; local q4=	184.5	; };
if `1'==	2004	{;  local q1=186.2		; local q2=	189.1	; local q3=	189.4	; local q4=	191	; };

 local deflator=`q`2'';
  replace wages=wages*(172.2/`q`2'');
 qui count if wages==.;
 *assert r(N)==`misn';


end;




# delimit ;
forvalues q=1/4 {;
 disp "****************************************************************************";
 disp "****************************************************************************";
 disp "Quarter=`q'"; 
 disp "$S_FNDATE";
 disp " ";
 forvalues y=1991/2004 {;
  disp "*************** Year=`y'(`q') ********************";
  if `y'>2002 {;
    *local datapath="${fmartorell_home}/data/twc"; cap confirm file "${fmartorell_home}/data/twc/C4w`y'_`q'.dta";
    local datapath="${d1}data/twc"; cap confirm file "${d1}data/twc/C4w`y'_`q'.dta";
  };
  else {;
   local datapath="${nh_data}/TWC";
  };

  *cap confirm file "`datapath'/C4w`y'_`q'.dta";
   
  cap confirm file "`datapath'/C4w`y'_`q'.dta";
  if _rc~=0 {;
    disp "No data!!!!!";
  };
  *else {; 
  else if _rc==0 & (`y'~=2004 | `q'~=4) {;     /* Don't run for 2004(4) since raw data is corrupted */;
     qui use "`datapath'/C4w`y'_`q'.dta", clear;
     *merge altpid using "${fmartorell_home}/tmp/tasp", uniqusing nokeep;
     merge altpid using "${d1}tmp/tasp", uniqusing nokeep;
     tab _merge;
     qui count if _merge==3;
     local n=r(N);
     disp "Num matches=`n'";
     local frac=`n'/$taasn;
     disp "% TAAS rec's matching=`frac'";
     assert _merge~=2;
     qui keep if _merge==3;
     qui summ wages, detail; 
     disp "Mean=" %5.2f r(mean) "  Median=" %5.2f r(p50) "  25p=" %5.2f r(p25) "  75p=" %5.2f r(p75) "  Max=" %5.2f r(max);
     bysort altpid: gen numperaltpid=_N;
     tab numperaltpid;
     drop numperaltpid _merge;
     *save "${fmartorell_home}/remediation/data/newtwc/twc`y'_`q'", replace;  /* Note: save in the ex. exam directory */;
     save "${d1}data/newtwc/twc`y'_`q'", replace;  /* Note: save in the ex. exam directory */;

 **********************************************************************
 From collapsetwc_new.do
 **********************************************************************;

 *count # missing wages;
 qui count if wages==.; local misn=r(N);
 disp "Num w/ missing wages=" r(N);

 gen byte poswage=wages>0 & wages<.;

 *deflate wages, convert using 2000 average cpi;
 cpi `y' `q';

 *now collapse to a single obs per altpid;
 collapse (sum) sumwages=wages (sum) npos=poswage (count) n=poswage, by(altpid);

 *set the counts to 5 if they're greater than 5 (should have very few);
 qui replace npos=5 if npos>5;
 qui replace n=5 if n>5;

 tabstat sumwages, by(npos) stat(mean count);
 tabstat sumwages, by(n) stat(mean count);

 qui summ sumwages if npos>0 & npos<5, detail;
 disp "Dist. of sumwages (at least 1 pos obs and fewer than 5 pos obs)";
 disp "  Mean=" %5.2f r(mean) "  Median=" %5.2f r(p50) "  25p=" %5.2f r(p25) "  75p=" %5.2f r(p75) "  Max=" %5.2f r(max);

 qui summ sumwages if n<5, detail;
 disp "Dist. of sumwages (fewer than 5 obs including zeroes)";
 disp "  Mean=" %5.2f r(mean) "  Median=" %5.2f r(p50) "  25p=" %5.2f r(p25) "  75p=" %5.2f r(p75) "  Max=" %5.2f r(max);
 di " ";
 di " ";

 sort altpid;
 qui compress;
 *qui save "${fmartorell_home}/remediation/data/newtwc/uniquetwc`y'_`q'", replace;
 qui save "${d1}data/newtwc/uniquetwc`y'_`q'", replace;  
   
  }; /* close if raw data file exists condition */;
 }; /* close year loop */;
}; /* close quarter loop */;

log close;
